#!/usr/bin/python
# -*- coding:UTF8 -*-
import sys

from lxml import etree
import requests
import os
import re
import io

sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='gb18030')

url = 'https://kpxxg.com/gxsp/'
user_agent = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36'}
r = requests.get(url=url, headers=user_agent)
r.encoding = 'utf8'
analytical_data = etree.HTML(r.text)  # 解析数据
tail_page = analytical_data.xpath('//a[text()="最后一页"]//@href')
if tail_page != []:
    tail_page = re.search('117-(\d+).html', tail_page[0])
    tail_page = tail_page.group(1)
else:
    tail_page = 1
for tail_page in range(1, 3):
    if tail_page == 1:
        url = 'https://kpxxg.com/gxsp/'
    else:
        url = 'https://kpxxg.com/gxsp/' + '117-' + str(tail_page) + '.html'
    r = requests.get(url=url, headers=user_agent)
    r.encoding = 'utf8'
    analytical_data = etree.HTML(r.text)  # 解析数据
    # video_name = analytical_data.xpath('//h2[@class="s2"]/a/text()')
    video_url = analytical_data.xpath('//h2[@class="s2"]//@href')
    video_url=['https://kpxxg.com'+video_url for video_url in video_url ]
    # print(video_url)
    j=0
    for url in video_url:
        r = requests.get(url=url, headers=user_agent)
        r.encoding = 'utf8'
        analytical_data = etree.HTML(r.text)  # 解析数据